0.1 data set 1

chr <- 21
chr_pvalues_grubert <- fst::read.fst("chr_pvalues_grubert_chr21.fst")
head(chr_pvalues_grubert)
##           SNP  gene        beta      tstat    pvalue   beta_se maf_sample
## 1 rs183048582 64684 -0.30947859 -1.1006867 0.2746493 0.2811687 0.07894737
## 2 rs183048582 64685  0.03130456  0.1101570 0.9125872 0.2841814 0.07894737
## 3 rs183048582 64686 -0.04461388 -0.1545630 0.8775924 0.2886453 0.07894737
## 4 rs183048582 64687  0.56033736  2.0317715 0.0458169 0.2757876 0.07894737
## 5 rs183048582 64688  0.09431013  0.3271629 0.7444810 0.2882666 0.07894737
## 6 rs183048582 64689 -0.03752318 -0.1300745 0.8968652 0.2884744 0.07894737
##       svars maf_sample_bin     svars_bin    maf_bin_uneq
## 1 0.4017506              2 [0.364,0.410) [0.0724,0.0987)
## 2 0.4017506              2 [0.364,0.410) [0.0724,0.0987)
## 3 0.4017506              2 [0.364,0.410) [0.0724,0.0987)
## 4 0.4017506              2 [0.364,0.410) [0.0724,0.0987)
## 5 0.4017506              2 [0.364,0.410) [0.0724,0.0987)
## 6 0.4017506              2 [0.364,0.410) [0.0724,0.0987)

#histogram

quick_hist = function(values_vec, breaks=50, title = NULL, threshold_proportion = 10^(-4)) {
    res = hist(values_vec, plot=FALSE, breaks=breaks)
    
    proportion <- mean(values_vec <= threshold_proportion)
    proportion <- formatC(proportion, format = "e", digits = 2)
    
    dat = data.frame(xmin=head(res$breaks, -1L),
                     xmax=tail(res$breaks, -1L),
                     ymin=0.0,
                     ymax=res$counts)

    ggplot(dat, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)) +
      geom_rect(size=0.5, colour="grey30", fill="grey80") + 
      ggtitle(paste(title, "proportion", proportion)) +
      ylab("pvalue count")
}

1 analysis

quick_hist(chr_pvalues_grubert$pvalue, breaks = 400)

chr_pvalues_grubert_split_maf_sample_bin <- chr_pvalues_grubert %>%
  select(pvalue, maf_sample_bin, maf_sample) %>%
  split(chr_pvalues_grubert$maf_sample_bin) 

maf_sample_bin_hist <- lapply(chr_pvalues_grubert_split_maf_sample_bin, function(maf_sample_bin_i){
  title <- unique(maf_sample_bin_i$maf_sample_bin)
  title <- paste0(title, ", ")
  quick_hist(maf_sample_bin_i$pvalue, breaks = 400, title = title)
})

for(maf_sample_bin_i in maf_sample_bin_hist){
  print(maf_sample_bin_i)
}

proportions_maf_sample_bin <- sapply(chr_pvalues_grubert_split_maf_sample_bin, function(maf_sample_bin_i){
  threshold_proportion = 10^(-4)
  proportion <- mean(maf_sample_bin_i$pvalue <= threshold_proportion)
})

plot(names(chr_pvalues_grubert_split_maf_sample_bin), proportions_maf_sample_bin)

chr_pvalues_grubert_split_maf_sample_bin <- chr_pvalues_grubert %>%
  select(pvalue, maf_bin_uneq, maf_sample) %>%
  split(chr_pvalues_grubert$maf_bin_uneq) 

maf_sample_bin_uneq_hist <- lapply(chr_pvalues_grubert_split_maf_sample_bin, function(maf_sample_bin_i){
  title <- unique(maf_sample_bin_i$maf_bin_uneq)
  title <- paste0(title, ", ")
  quick_hist(maf_sample_bin_i$pvalue, breaks = 400, title = title)
})

for(maf_sample_bin_uneq_i in maf_sample_bin_uneq_hist){
  print(maf_sample_bin_uneq_i)
}

#IHW

chr_pvalues_grubert_p_adjust <- p.adjust(chr_pvalues_grubert$pvalue, method = "BH")

sum(chr_pvalues_grubert_p_adjust <= 0.05)
## [1] 1543
ihw_maf_equal <- ihw(chr_pvalues_grubert$pvalue, chr_pvalues_grubert$maf_sample_bin, alpha = 0.05, lambdas = Inf, nfolds = 3)
rejections(ihw_maf_equal)

Gives “Error: cannot allocate vector of size 687.9 Mb”